{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Recommender Systems 2020/21\n",
    "\n",
    "### Practice - Hybrid model with LightFM on MovieLens"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Movielens10M: Verifying data consistency...\n",
      "Movielens10M: Verifying data consistency... Passed!\n",
      "DataReader: current dataset is: <class 'Data_manager.Dataset.Dataset'>\n",
      "\tNumber of items: 10681\n",
      "\tNumber of users: 69878\n",
      "\tNumber of interactions in URM_all: 10000054\n",
      "\tValue range in URM_all: 0.50-5.00\n",
      "\tInteraction density: 1.34E-02\n",
      "\tInteractions per user:\n",
      "\t\t Min: 2.00E+01\n",
      "\t\t Avg: 1.43E+02\n",
      "\t\t Max: 7.36E+03\n",
      "\tInteractions per item:\n",
      "\t\t Min: 0.00E+00\n",
      "\t\t Avg: 9.36E+02\n",
      "\t\t Max: 3.49E+04\n",
      "\tGini Index: 0.57\n",
      "\n",
      "\tICM name: ICM_genres, Value range: 1.00 / 1.00, Num features: 20, feature occurrences: 21564, density 1.01E-01\n",
      "\tICM name: ICM_tags, Value range: 1.00 / 69.00, Num features: 10217, feature occurrences: 108563, density 9.95E-04\n",
      "\tICM name: ICM_all, Value range: 1.00 / 69.00, Num features: 10237, feature occurrences: 130127, density 1.19E-03\n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "from Notebooks_utils.data_splitter import train_test_holdout\n",
    "from Data_manager.Movielens.Movielens10MReader import Movielens10MReader\n",
    "\n",
    "data_reader = Movielens10MReader()\n",
    "data_loaded = data_reader.load_data()\n",
    "\n",
    "URM_all = data_loaded.get_URM_all()\n",
    "ICM_genres = data_loaded.get_ICM_from_name(\"ICM_genres\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from Base.Evaluation.Evaluator import EvaluatorHoldout\n",
    "\n",
    "URM_train_validation, URM_test = train_test_holdout(URM_all, train_perc = 0.8)\n",
    "URM_train, URM_validation = train_test_holdout(URM_train_validation, train_perc = 0.8)\n",
    "\n",
    "evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])\n",
    "evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "\n",
    "## A pure collaborative filtering model\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\ferra\\Anaconda3\\envs\\RecSysCourse\\lib\\site-packages\\lightfm\\_lightfm_fast.py:10: UserWarning: LightFM was compiled without OpenMP support. Only a single thread will be used.\n",
      "  \"LightFM was compiled without OpenMP support. \"\n"
     ]
    }
   ],
   "source": [
    "## In order to evaluate put it in a recommender class\n",
    "from Base.BaseRecommender import BaseRecommender\n",
    "from lightfm import LightFM\n",
    "import numpy as np\n",
    "\n",
    "class LightFMWrapper(BaseRecommender):\n",
    "    \"\"\"LightFMWrapper\"\"\"\n",
    "\n",
    "    RECOMMENDER_NAME = \"LightFMWrapper\"\n",
    "\n",
    "    def __init__(self, URM_train):\n",
    "        super(LightFMWrapper, self).__init__(URM_train)\n",
    "\n",
    "\n",
    "    def fit(self, ITEM_ALPHA, NUM_COMPONENTS, NUM_EPOCHS, NUM_THREADS):\n",
    "        \n",
    "        # Let's fit a WARP model\n",
    "        self.lightFM_model = LightFM(loss='warp',\n",
    "                                     item_alpha=ITEM_ALPHA,\n",
    "                                     no_components=NUM_COMPONENTS)\n",
    "\n",
    "        self.lightFM_model = self.lightFM_model.fit(URM_train, \n",
    "                                       epochs=NUM_EPOCHS,\n",
    "                                       num_threads=NUM_THREADS)\n",
    "\n",
    "\n",
    "    def _compute_item_score(self, user_id_array, items_to_compute = None):\n",
    "        \n",
    "        # Create a single (n_items, ) array with the item score, then copy it for every user\n",
    "        items_to_compute = np.arange(self.n_items) if items_to_compute is None else np.array(items_to_compute)\n",
    "        \n",
    "        item_scores = - np.ones((len(user_id_array), self.n_items)) * np.inf\n",
    "\n",
    "        for user_index, user_id in enumerate(user_id_array):\n",
    "            item_scores[user_index] = self.lightFM_model.predict(int(user_id), \n",
    "                                                                 items_to_compute)\n",
    "\n",
    "        return item_scores\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "LightFMWrapper: URM Detected 67 (0.63 %) cold items.\n",
      "EvaluatorHoldout: Processed 23000 ( 33.03% ) in 30.86 sec. Users per second: 745\n",
      "EvaluatorHoldout: Processed 45000 ( 64.62% ) in 1.02 min. Users per second: 736\n",
      "EvaluatorHoldout: Processed 67000 ( 96.22% ) in 1.52 min. Users per second: 733\n",
      "EvaluatorHoldout: Processed 69633 ( 100.00% ) in 1.59 min. Users per second: 732\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{10: {'ROC_AUC': 0.393381014156076,\n",
       "  'PRECISION': 0.1663880631310307,\n",
       "  'PRECISION_RECALL_MIN_DEN': 0.20443265588134776,\n",
       "  'RECALL': 0.12839842022035525,\n",
       "  'MAP': 0.10275651354390812,\n",
       "  'MRR': 0.3611644007049251,\n",
       "  'NDCG': 0.13380897842861825,\n",
       "  'F1': 0.14494534624970035,\n",
       "  'HIT_RATE': 1.6638806313098675,\n",
       "  'ARHR': 0.5478780928308715,\n",
       "  'NOVELTY': 0.00834751945953207,\n",
       "  'AVERAGE_POPULARITY': 0.6355808532649352,\n",
       "  'DIVERSITY_MEAN_INTER_LIST': 0.8406645637575517,\n",
       "  'DIVERSITY_HERFINDAHL': 0.9840652490967872,\n",
       "  'COVERAGE_ITEM': 0.044939612395843084,\n",
       "  'COVERAGE_ITEM_CORRECT': 0.03960303342383672,\n",
       "  'COVERAGE_USER': 0.99649388935001,\n",
       "  'COVERAGE_USER_CORRECT': 0.6945390537794441,\n",
       "  'DIVERSITY_GINI': 0.006947374690823262,\n",
       "  'SHANNON_ENTROPY': 6.605857170229383}}"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Set the number of threads; you can increase this\n",
    "# if you have more physical cores available.\n",
    "NUM_THREADS = 4\n",
    "NUM_COMPONENTS = 10\n",
    "NUM_EPOCHS = 3\n",
    "ITEM_ALPHA = 1e-6\n",
    "\n",
    "recommender = LightFMWrapper(URM_train)\n",
    "recommender.fit(ITEM_ALPHA, NUM_COMPONENTS, NUM_EPOCHS, NUM_THREADS)\n",
    "\n",
    "result_dict, _ = evaluator_validation.evaluateRecommender(recommender)\n",
    "result_dict"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## A hybrid model\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "## In order to evaluate put it in a dummy recommender class\n",
    "from Base.BaseRecommender import BaseRecommender\n",
    "from lightfm import LightFM\n",
    "\n",
    "\n",
    "class LightFMWrapper(BaseRecommender):\n",
    "    \"\"\"LightFMWrapper\"\"\"\n",
    "\n",
    "    RECOMMENDER_NAME = \"LightFMWrapper\"\n",
    "\n",
    "    def __init__(self, URM_train, ICM_train):\n",
    "        super(LightFMWrapper, self).__init__(URM_train)\n",
    "        \n",
    "        self.ICM_train = ICM_train.copy()\n",
    "\n",
    "\n",
    "    def fit(self, ITEM_ALPHA, NUM_COMPONENTS, NUM_EPOCHS, NUM_THREADS):\n",
    "        \n",
    "        # Let's fit a WARP model\n",
    "        self.lightFM_model = LightFM(loss='warp',\n",
    "                                     item_alpha=ITEM_ALPHA,\n",
    "                                     no_components=NUM_COMPONENTS)\n",
    "\n",
    "        self.lightFM_model = self.lightFM_model.fit(URM_train, \n",
    "                                       item_features=self.ICM_train, \n",
    "                                       epochs=NUM_EPOCHS, \n",
    "                                       num_threads=NUM_THREADS)\n",
    "\n",
    "\n",
    "    def _compute_item_score(self, user_id_array, items_to_compute = None):\n",
    "        \n",
    "        # Create a single (n_items, ) array with the item score, then copy it for every user\n",
    "        if items_to_compute is None:\n",
    "            items_to_compute = np.arange(self.n_items)\n",
    "            item_features = self.ICM_train \n",
    "        else:     \n",
    "            items_to_compute = np.array(items_to_compute)\n",
    "            item_features = self.ICM_train[items_to_compute,:]\n",
    "        \n",
    "        item_scores = - np.ones((len(user_id_array), self.n_items)) * np.inf\n",
    "\n",
    "        for user_index, user_id in enumerate(user_id_array):\n",
    "            item_scores[user_index] = self.lightFM_model.predict(int(user_id), \n",
    "                                                                 items_to_compute,\n",
    "                                                                 item_features = item_features)\n",
    "\n",
    "        return item_scores\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "LightFMWrapper: URM Detected 67 (0.63 %) cold items.\n",
      "EvaluatorHoldout: Processed 21000 ( 30.16% ) in 31.04 sec. Users per second: 677\n",
      "EvaluatorHoldout: Processed 42000 ( 60.32% ) in 1.04 min. Users per second: 673\n",
      "EvaluatorHoldout: Processed 62000 ( 89.04% ) in 1.55 min. Users per second: 668\n",
      "EvaluatorHoldout: Processed 69633 ( 100.00% ) in 1.75 min. Users per second: 663\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{10: {'ROC_AUC': 0.03868070329726443,\n",
       "  'PRECISION': 0.009738198842503827,\n",
       "  'PRECISION_RECALL_MIN_DEN': 0.010646902957661701,\n",
       "  'RECALL': 0.0044830359369395344,\n",
       "  'MAP': 0.0027995338146300694,\n",
       "  'MRR': 0.02238778981594856,\n",
       "  'NDCG': 0.0035187683171075046,\n",
       "  'F1': 0.006139649059885138,\n",
       "  'HIT_RATE': 0.09738198842502836,\n",
       "  'ARHR': 0.024103212101359135,\n",
       "  'NOVELTY': 0.01317335684681066,\n",
       "  'AVERAGE_POPULARITY': 0.06398865119726148,\n",
       "  'DIVERSITY_MEAN_INTER_LIST': 0.8191545897758553,\n",
       "  'DIVERSITY_HERFINDAHL': 0.9819142825891063,\n",
       "  'COVERAGE_ITEM': 0.0607621009268795,\n",
       "  'COVERAGE_ITEM_CORRECT': 0.025653028742627097,\n",
       "  'COVERAGE_USER': 0.99649388935001,\n",
       "  'COVERAGE_USER_CORRECT': 0.08579238100689773,\n",
       "  'DIVERSITY_GINI': 0.00706576386401895,\n",
       "  'SHANNON_ENTROPY': 6.5441175554756885}}"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "recommender = LightFMWrapper(URM_train, ICM_genres)\n",
    "recommender.fit(ITEM_ALPHA, NUM_COMPONENTS, NUM_EPOCHS, NUM_THREADS)\n",
    "\n",
    "result_dict, _ = evaluator_validation.evaluateRecommender(recommender)\n",
    "result_dict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
